Fechar

@PhDThesis{Rodrigues:2024:MaLeHa,
               author = "Rodrigues, Marcos Lima",
                title = "Machine learning e hashing para identifica{\c{c}}{\~a}o de 
                         imagens de sensoriamento remoto baseada em conte{\'u}do",
               school = "Instituto Nacional de Pesquisas Espaciais (INPE)",
                 year = "2024",
              address = "S{\~a}o Jos{\'e} dos Campos",
                month = "2023-12-15",
             keywords = "recupera{\c{c}}{\~a}o de imagens baseada em conte{\'u}do, redes 
                         neurais convolucionais, euroSAT, uso e cobertura da terra, 
                         Cerrado, content-based image retrieval (CBIR), deep hashing neural 
                         network (DHNN), euroSAT, land use and land cover (LULC), the 
                         brazilian savanna (Cerrado).",
             abstract = "Neste trabalho {\'e} apresentado o desenvolvimento de uma 
                         solu{\c{c}}{\~a}o (framework) para busca e 
                         recupera{\c{c}}{\~a}o de imagens de sat{\'e}lite baseadas em 
                         conte{\'u}do, com potencial para aplica{\c{c}}{\~a}o no escopo 
                         de grandes conjuntos de dados. A {\'a}rea de sensoriamento remoto 
                         (SR) para observa{\c{c}}{\~a}o da Terra tem experimentado um 
                         grande desenvolvimento na {\'u}ltima d{\'e}cada, dando origem a 
                         era do Remote Sensing Big Data (RSBD), tornando desafiadora a 
                         tarefa de recuperar imagens {\'u}teis nesse grande volume de 
                         dados, por exemplo, que possam ser usadas para estudos de uso e 
                         cobertura da terra no Cerrado brasileiro. Nesse contexto, o 
                         desenvolvimento de sistemas baseados em Content-Based Image 
                         Retrieval (CBIR) apoiado por m{\'e}todos de Deep Learning como as 
                         Convolutional Neural Networks (CNNs), t{\^e}m sido empregados com 
                         sucesso a dados multifontes e multiespectrais (MS). As 
                         arquiteturas Deep Hashing Neural Networks (DHNNs) empregam CNNs 
                         para extra{\c{c}}{\~a}o de atributos de imagens e convers{\~a}o 
                         desses atributos em c{\'o}digos bin{\'a}rios (hash codes) para 
                         cria{\c{c}}{\~a}o de um espa{\c{c}}o m{\'e}trico otimizado 
                         para CBIR no escopo do RSBD. A Metric-Learning-Based Deep Hashing 
                         Network (MiLaN) representa o estado da arte desse tipo de 
                         arquitetura, baseada na combina{\c{c}}{\~a}o de tr{\^e}s 
                         fun{\c{c}}{\~o}es de perda que permitem o aprendizado de um 
                         espa{\c{c}}o m{\'e}trico ideal para a recupera{\c{c}}{\~a}o de 
                         imagens baseada em conte{\'u}do (Semantic- Based Metric Space). 
                         Originalmente a rede MiLaN adotou como m{\'o}dulo de 
                         extra{\c{c}}{\~a}o de caracter{\'{\i}}sticas das imagens 
                         (backbone) a rede Inception V3 pr{\'e}-treinada com dados fora do 
                         dom{\'{\i}}nio do SR (ImageNet), isso implica em 
                         limita{\c{c}}{\~o}es devido a diferen{\c{c}}as t{\'{\i}}picas 
                         entre as imagens como a resolu{\c{c}}{\~a}o espacial e 
                         influ{\^e}ncia da atmosfera nas imagens orbitais. O framework 
                         proposto possibilitou avan{\c{c}}os em rela{\c{c}}{\~a}o {\`a} 
                         abordagem original da MiLaN ao adotar um novo backbone baseado na 
                         ResNet-50 e realizar o processo de ajuste dessas arquiteturas 
                         (MiLaN+ResNet-50) atrav{\'e}s do fine-tuning baseado em imagens 
                         satelitais MS. Esta afirma{\c{c}}{\~a}o {\'e} evidenciada pelos 
                         resultados expressivos alcan{\c{c}}ados para tarefa CBIR medidos 
                         atrav{\'e}s da m{\'e}trica mean Average Precision - mAP, o 
                         desempenho global baseado nas 100 primeiras imagens recuperadas 
                         (mAP@100) foi de 99,8873% para o conjunto EuroSAT MS (Sentinel 2 - 
                         13 bandas). De maneira particular foi demonstrado que os dados MS 
                         fornecem informa{\c{c}}{\~o}es sem{\^a}nticas de qualidade 
                         durante o processo de extra{\c{c}}{\~a}o de 
                         caracter{\'{\i}}sticas usando a ResNet-50, contribuindo assim 
                         para corre{\c{c}}{\~a}o de erros em rela{\c{c}}{\~a}o {\`a} 
                         discrimina{\c{c}}{\~a}o de imagens que apresentam padr{\~o}es 
                         geom{\'e}tricos ({\'A}reas Industriais/Residenciais) e de 
                         textura (Floresta, Pastagem e Culturas Permanente) similares 
                         quando utilizado somente as bandas RGB das imagens de m{\'e}dia 
                         resolu{\c{c}}{\~a}o do conjunto EuroSAT. O desempenho para o 
                         conjunto EuroSAT MS superou o apresentado por outros m{\'e}todos 
                         do estado da arte para realiza{\c{c}}{\~a}o de CBIR, inclusive 
                         utilizando imagens a{\'e}reas de alta resolu{\c{c}}{\~a}o 
                         espacial do conjunto Aerial Image Dataset (AID). ABSTRACT: This 
                         work presents the development of a framework for searching and 
                         retrieving content-based satellite images, with potential for 
                         application in the scope of large datasets. The area of remote 
                         sensing (RS) for Earth observation has experienced great 
                         development in the last decade, giving rise to the era of Remote 
                         Sensing Big Data (RSBD), making the task of retrieving useful 
                         images from this large volume of data challenging, for example, 
                         that can be used for studies of land use and land cover in the 
                         Brazilian Cerrado. In this context, the development of systems 
                         based on Content-Based Image Retrieval (CBIR) supported by Deep 
                         Learning methods such as Convolutional Neural Networks (CNNs), 
                         have been successfully applied to multisource and multispectral 
                         (MS) data. Deep Hashing Neural Networks (DHNNs) architectures 
                         employ CNNs to extract image attributes and convert these 
                         attributes into binary codes (hash codes) to create a metric space 
                         optimized for CBIR within the scope of RSBD. The 
                         Metric-Learning-Based Deep Hashing Network (MiLaN) represents the 
                         state of the art of this type of architecture, based on the 
                         combination of three loss functions that allow the learning of a 
                         space ideal metric for CBIR (Semantic-Based Metric Space). 
                         Originally, the MiLaN network adopted the Inception V3 network 
                         pre-trained with data outside the RS domain (ImageNet) as an image 
                         feature extraction module (backbone), this implies limitations due 
                         to typical differences between images such as the spatial 
                         resolution and influence of the atmosphere on orbital images. The 
                         proposed framework enabled advances in the original MiLaN approach 
                         by adopting a new backbone based on ResNet-50 and carrying out the 
                         adjustment process of these architectures (MiLaN+ResNet-50) 
                         through fine-tuning based on MS satellite images. This statement 
                         is evidenced by the expressive results achieved for the CBIR task 
                         measured using the mean Average Precision (mAP) metric, the global 
                         performance based on the top-100 recovered images (mAP@100) was 
                         99.8873% for the set EuroSAT MS (Sentinel 2 - 13 bands). In 
                         particular, it was demonstrated that MS data provides quality 
                         semantic information during the feature extraction process using 
                         ResNet-50, thus contributing to error correction concerning the 
                         discrimination of images that present geometric patterns 
                         (Industrial/Residential Areas) and texture (Forest, Pasture and 
                         Permanent Crops) similar when using only the RGB bands of the 
                         medium resolution images from the EuroSAT set. The performance for 
                         the EuroSAT MS dataset surpassed that presented by other 
                         state-of-the-art methods for carrying out CBIR, including using 
                         high spatial resolution aerial images from the Aerial Image 
                         Dataset (AID).",
            committee = "Gomes, Karine Reis Ferreira (presidente) and K{\"o}rting, Thales 
                         Sehn (orientador) and Queiroz, Gilberto Ribeiro de (orientador) 
                         and Negri, Rog{\'e}rio Galante and Noma, Alexandre",
         englishtitle = "Machine learning and hashing for content-based image retrieval 
                         (CBIR) of remote sensing images",
             language = "pt",
                pages = "106",
                  ibi = "8JMKD3MGP3W34T/4ADRCA2",
                  url = "http://urlib.net/ibi/8JMKD3MGP3W34T/4ADRCA2",
           targetfile = "publicacao.pdf",
        urlaccessdate = "28 abr. 2024"
}


Fechar